# Google Sheets permissions did not work with my google account
# use googlesheets4 to get data
url <- "https://docs.google.com/spreadsheets/d/1IPS5dBSGtwYVbjsfbaMCYIWnOuRmJcbequohNxCyGVw/edit?resourcekey#gid=1625408792"
googlesheets4::gs4_auth() # google sheets authorisation
# load "Ask a A Manager 2021 Survey" googlesheet
# https://www.askamanager.org/
ask_a_manager_2021 <- googlesheets4::read_sheet(url) %>%
janitor::clean_names()
# if googlesheets is now working, read local copy
# ask_a_manager_2021 <- read_csv(here::here("data", "ask_a_manager_2021.csv"))%>%
# janitor::clean_names()skimr::skim(ask_a_manager_2021)| Name | ask_a_manager_2021 |
| Number of rows | 26776 |
| Number of columns | 18 |
| _______________________ | |
| Column type frequency: | |
| character | 14 |
| list | 2 |
| numeric | 1 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| how_old_are_you | 0 | 1.00 | 5 | 10 | 0 | 7 | 0 |
| industry | 62 | 1.00 | 2 | 171 | 0 | 1084 | 0 |
| job_title | 0 | 1.00 | 1 | 126 | 0 | 12843 | 0 |
| additional_context_on_job_title | 19876 | 0.26 | 1 | 781 | 0 | 6615 | 0 |
| currency | 0 | 1.00 | 3 | 7 | 0 | 11 | 0 |
| additional_context_on_income | 23860 | 0.11 | 1 | 1143 | 0 | 2840 | 0 |
| country | 0 | 1.00 | 1 | 209 | 0 | 297 | 0 |
| state | 4761 | 0.82 | 4 | 114 | 0 | 125 | 0 |
| city | 8 | 1.00 | 1 | 171 | 0 | 4072 | 0 |
| overall_years_of_professional_experience | 0 | 1.00 | 9 | 16 | 0 | 8 | 0 |
| years_of_experience_in_field | 0 | 1.00 | 9 | 16 | 0 | 8 | 0 |
| highest_level_of_education_completed | 202 | 0.99 | 3 | 34 | 0 | 6 | 0 |
| gender | 155 | 0.99 | 3 | 29 | 0 | 5 | 0 |
| race | 151 | 0.99 | 5 | 125 | 0 | 47 | 0 |
Variable type: list
| skim_variable | n_missing | complete_rate | n_unique | min_length | max_length |
|---|---|---|---|---|---|
| other_monetary_comp | 0 | 1 | 809 | 0 | 1 |
| currency_other | 0 | 1 | 105 | 0 | 1 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| annual_salary | 0 | 1 | 144962 | 5487030 | 0 | 54000 | 75718 | 110000 | 8.7e+08 | ▇▁▁▁▁ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| timestamp | 0 | 1 | 2021-04-27 11:02:09 | 2021-09-17 23:57:58 | 2021-04-28 12:36:21 | 26771 |
# unique(ask_a_manager_2021$country) # checking the list of unique country codes reveals that country names are inconsistent across the data
ask_a_manager_2021_cleaned <- ask_a_manager_2021%>%
mutate(
country = countrycode::countrycode(sourcevar = ask_a_manager_2021$country,
origin = 'country.name',
destination = 'country.name',
warn = TRUE,
nomatch = NULL)
)
ask_a_manager_2021_cleaned <- ask_a_manager_2021_cleaned%>%
mutate(
country = countrycode::countrycode(sourcevar = ask_a_manager_2021_cleaned$country,
origin = 'unicode.symbol',
destination = 'country.name',
warn = TRUE,
nomatch = NULL)
)
country_list <- unique(countryname_dict$country.name.en)
typos <- ask_a_manager_2021_cleaned%>%
filter(!country %in% unique(countryname_dict$country.name.en))
unique(typos$country) #identifying the the typos and mismatches that the country code library couldn't deal with## [1] "Scotland"
## [2] "England"
## [3] "ISA"
## [4] "United State"
## [5] "America"
## [6] "United State of America"
## [7] "United Stated"
## [8] "Northern Ireland"
## [9] "Contracts"
## [10] "USA-- Virgin Islands"
## [11] "United Statws"
## [12] "England/UK"
## [13] "We don't get raises, we get quarterly bonuses, but they periodically asses income in the area you work, so I got a raise because a 3rd party assessment showed I was paid too little for the area we were located"
## [14] "Unites States"
## [15] "England, UK."
## [16] "U. S."
## [17] "United Sates"
## [18] "Global"
## [19] "Uniited States"
## [20] "Worldwide (based in US but short term trips aroudn the world)"
## [21] "Canadw"
## [22] "United Sates of America"
## [23] "United States (I work from home and my clients are all over the US/Canada/PR"
## [24] "Unted States"
## [25] "United Statesp"
## [26] "United Stattes"
## [27] "United Statea"
## [28] "United Statees"
## [29] "Can"
## [30] "Uniyed states"
## [31] "Uniyes States"
## [32] "U.A."
## [33] "United Kindom"
## [34] "United Status"
## [35] "Currently finance"
## [36] "UXZ"
## [37] "England, UK"
## [38] "Canda"
## [39] "Canada and USA"
## [40] "Catalonia"
## [41] "$2,175.84/year is deducted for benefits"
## [42] "Jersey, Channel islands"
## [43] "Virginia"
## [44] "USS"
## [45] "Uniteed States"
## [46] "Hartford"
## [47] "Csnada"
## [48] "United Stares"
## [49] "I.S."
## [50] "UK (Northern Ireland)"
## [51] "UK for U.S. company"
## [52] "Unites states"
## [53] "NZ"
## [54] "Canad"
## [55] "Unite States"
## [56] "The US"
## [57] "Remote"
## [58] "IS"
## [59] "Australi"
## [60] "England, Gb"
## [61] "Danmark"
## [62] "U.K. (northern England)"
## [63] "NL"
## [64] "Nederland"
## [65] "Englang"
## [66] "United statew"
## [67] "bonus based on meeting yearly goals set w/ my supervisor"
## [68] "International"
## [69] "Wales"
## [70] "I earn commission on sales. If I meet quota, I'm guaranteed another 16k min. Last year i earned an additional 27k. It's not uncommon for people in my space to earn 100k+ after commission."
## [71] "United Statues"
## [72] "Untied States"
## [73] "UK (England)"
## [74] "UK, remote"
## [75] "Scotland, UK"
## [76] "USAB"
## [77] "Unitied States"
## [78] "United Sttes"
## [79] "Unites kingdom"
## [80] "united stated"
## [81] "Panamá"
## [82] "england"
## [83] "I was brought in on this salary to help with the EHR and very quickly was promoted to current position but compensation was not altered."
## [84] "Uniter Statez"
## [85] "U. S"
## [86] "Brasil"
## [87] "United Stateds"
## [88] "n/a (remote from wherever I want)"
## [89] "Africa"
## [90] "San Francisco"
## [91] "Usat"
## [92] "Unitef Stated"
## [93] "UA"
## [94] "Wales, UK"
## [95] "USaa"
## [96] "america"
## [97] "United States- Puerto Rico"
## [98] "Y"
## [99] "United y"
## [100] "Wales (UK)"
## [101] "europe"
## [102] "California"
## [103] "UK, but for globally fully remote company"
## [104] "México"
## [105] "USD"
## [106] "United Statss"
## [107] "ARGENTINA BUT MY ORG IS IN THAILAND"
## [108] "I work for a UAE-based organization, though I am personally in the US."
## [109] "United States"
## [110] "na"
## [111] "Policy"
typo_map <- hash() #create a map for the correct version of the typos
typo_map[["United Kingdom"]] <- list("Scotland",
"England",
"Northern Ireland",
"England/UK",
"England, UK",
"United Kindom",
"UK (Northern Ireland)", "UK for U.S. company",
"England, Gb",
"Englang",
"England, Gb",
"U.K. (northern England)",
"Unites kingdo",
"Unites kingdom",
"UK (England)",
"england",
"Jersey",
"Channel islands",
"UK, remote",
"Scotland, UK",
"England, UK.",
"Jersey, Channel islands",
"Wales, UK",
"Wales (UK)",
"UK, but for globally fully remote company",
"Wales"
)
typo_map[["United States"]] <- list("United State",
"ISA",
"America",
"United State of America",
"United Stated",
"USA-- Virgin Islands",
"United Statws" ,
"Unites States" ,
"U. S.",
"United Sates",
"Uniited States",
"Worldwide (based in US but short term trips aroudn the world)",
"United Sates of America",
"United States (I work from home and my clients are all over the US/Canada/PR",
"Unted States",
"United Statesp",
"United Stattes",
"United Statea",
"United Statees",
"Uniyed states",
"Uniyes States",
"United Status",
"Uniteed States",
"United Stares",
"Unites states",
"Unite States",
"The US",
"United statew",
"United Statues",
"Untied States",
"Unitied States",
"United Sttes",
"united stated",
"Uniter Statez",
"U. S" ,
"United Stateds",
"Usat",
"Unitef Stated",
"USaa",
"america",
"United States- Puerto Rico",
"California",
"Virginia",
"Hartford",
"San Francisco",
"USD",
"United Statss",
"I work for a UAE-based organization, though I am personally in the US.",
"United States",
"U.A.",
"UXZ",
"USS",
"IS",
"USAB",
"UA",
"I.S",
"I.S.",
"United y"
)
typo_map[["Canada"]] <- list(
"Canadw",
"Can",
"Canda",
"Canada and USA",
"Csnada",
"Canad"
)
typo_map[["NA"]] <- list(
"Contracts",
"We don't get raises, we get quarterly bonuses, but they periodically asses income in the area you work, so I got a raise because a 3rd party assessment showed I was paid too little for the area we were located",
"Global" ,
"Currently finance",
"UXZ",
"$2,175.84/year is deducted for benefits",
"Remote",
"bonus based on meeting yearly goals set w/ my supervisor",
"I earn commission on sales. If I meet quota, I'm guaranteed another 16k min. Last year i earned an additional 27k. It's not uncommon for people in my space to earn 100k+ after commission.",
"I was brought in on this salary to help with the EHR and very quickly was promoted to current position but compensation was not altered.",
"n/a (remote from wherever I want)",
"Africa",
"europe",
"na",
"Policy",
"NA",
"International",
"NZ",
"Y"
)
typo_map[["Spain"]] <- list("Catalonia")
typo_map[["Australia"]] <- list("Australi")
typo_map[["Denmark"]] <- list("Danmark")
typo_map[["Netherlands"]] <- list("NL",
"Nederland")
typo_map[["Panama"]] <- list("Panamá")
typo_map[["Brazil"]] <- list("Brasil")
typo_map[["Argentina"]] <- list("ARGENTINA BUT MY ORG IS IN THAILAND")
typo_map[["Mexico"]] <- list("México")
for (c in keys(typo_map))
{
ask_a_manager_2021_cleaned <- ask_a_manager_2021_cleaned%>%
mutate(country = case_when(country %in% values(typo_map, keys = c) ~c,
!country %in% values(typo_map, keys = c) ~ country))
}
ask_a_manager_2021_cleaned <- ask_a_manager_2021_cleaned%>%
mutate(country_code = countrycode::countrycode(sourcevar = ask_a_manager_2021_cleaned$country,
origin = 'country.name',
destination = 'iso3c',
nomatch = NA,
warn = TRUE))typos <- ask_a_manager_2021_cleaned%>%
filter(!country %in% unique(countryname_dict$country.name.en))
unique(typos$country)## [1] "NA"
unique(ask_a_manager_2021_cleaned$country)## [1] "United States" "United Kingdom" "Canada"
## [4] "Netherlands" "Australia" "Spain"
## [7] "Finland" "France" "Germany"
## [10] "Ireland" "India" "Argentina"
## [13] "Denmark" "Switzerland" "Bermuda"
## [16] "Malaysia" "Mexico" "South Africa"
## [19] "Belgium" "Sweden" "Hong Kong SAR China"
## [22] "Kuwait" "Norway" "Sri Lanka"
## [25] "NA" "Greece" "Japan"
## [28] "Austria" "Brazil" "Hungary"
## [31] "Luxembourg" "Colombia" "New Zealand"
## [34] "Trinidad & Tobago" "Cayman Islands" "Czechia"
## [37] "Latvia" "Puerto Rico" "Rwanda"
## [40] "United Arab Emirates" "Bangladesh" "Romania"
## [43] "Serbia" "Philippines" "Russia"
## [46] "Poland" "Turkey" "Italy"
## [49] "China" "Afghanistan" "Israel"
## [52] "Taiwan" "Cambodia" "Vietnam"
## [55] "Singapore" "South Korea" "Thailand"
## [58] "Lithuania" "Eritrea" "Indonesia"
## [61] "Cuba" "Slovenia" "Côte d’Ivoire"
## [64] "Somalia" "Slovakia" "Portugal"
## [67] "Sierra Leone" "Bahamas" "Costa Rica"
## [70] "Chile" "Qatar" "Nigeria"
## [73] "Panama" "Congo - Brazzaville" "Uruguay"
## [76] "Pakistan" "Uganda" "Malta"
## [79] "Saudi Arabia" "Bulgaria" "Estonia"
## [82] "Morocco" "Ecuador" "Zimbabwe"
## [85] "Ghana" "Croatia" "Ukraine"
## [88] "Isle of Man" "Jamaica" "Kenya"
## [91] "Jordan"
ask_a_manager_2021_cleaned$currency <- substr(ask_a_manager_2021_cleaned$currency, 0, 3)
from <- ask_a_manager_2021_cleaned$currency
to <- c("USD")
exchange_rate <- getQuote(paste0(from, to, "=X"))
exchange_rate <- exchange_rate[-c(8),]
exchange_rate <- exchange_rate %>%
mutate(currency = c("USD", "GBP", "CAD", "EUR", "AUD", "CHF", "ZAR", "SEK", "HKD", "JPY")) %>%
select(currency, Last)%>%
clean_names()
ask_a_manager_2021_cleaned <-
merge(ask_a_manager_2021_cleaned, exchange_rate, by="currency", all.x = T)
ask_a_manager_2021_cleaned <- ask_a_manager_2021_cleaned %>%
mutate(salary_usd = annual_salary * last)%>%
clean_names()standard_industry <- c("Accounting, Banking & Finance",
"Agriculture or Forestry",
"Art & Design",
"Business or Consulting",
"Computing or Tech",
"Education (Primary/Secondary)",
"Education (Higher Education)",
"Engineering or Manufacturing",
"Entertainment",
"Government and Public Administration",
"Health care",
"Hospitality & Events",
"Insurance",
"Law",
"Law Enforcement & Security",
"Leisure, Sport & Tourism",
"Marketing, Advertising & PR",
"Media & Digital",
"Nonprofits",
"Property or Construction",
"Recruitment or HR",
"Retail",
"Sales",
"Social Work",
"Transport or Logistics",
"Utilities & Telecommunications")
non_standard_industry <- c("Mining and Mentals",
"Aerospace & Defence",
"Automotive",
"Biotechnology",
"Church",
"Food & Bevarage",
"Animal Care",
"Library & Publishing",
"Fashion")
industry_map <- hash()
for(i in standard_industry)
{
industry_map[[as.character(i)]] <- c("")
}
for(i in non_standard_industry)
{
industry_map[[as.character(i)]] <- c("")
}
ask_a_manager_2021_cleaned%>%
filter(industry %in% standard_industry)# roughly 2000 people used the other function within the industry section.....# identify all instances put down to the other category
outliers_table <- ask_a_manager_2021_cleaned%>%
select(industry)%>%
filter(!industry %in% standard_industry)%>%
mutate(industry = tolower(industry))
#make a list out of it
outliers <- tolower(unique(ask_a_manager_2021_cleaned$industry[!ask_a_manager_2021_cleaned$industry %in% standard_industry]))
science_related <- outliers[grep("science",outliers)]# Higher education
industry_map[["Education (Higher Education)"]] <- outliers[grep("academ",outliers)]
industry_map[["Education (Higher Education)"]] <- industry_map[["Education (Higher Education)"]][-grep("non-aca",industry_map[["Education (Higher Education)"]])]
industry_map[["Education (Higher Education)"]]<- industry_map[["Education (Higher Education)"]][-grep("not",industry_map[["Education (Higher Education)"]])]
industry_map[["Education (Higher Education)"]]<- c(outliers[grep("higher educ",outliers)],
industry_map[["Education (Higher Education)"]])
industry_map[["Education (Higher Education)"]]<- c(outliers[grep("university",outliers)],
industry_map[["Education (Higher Education)"]])
industry_map[["Education (Higher Education)"]]<- c(outliers[grep("college",outliers)],
industry_map[["Education (Higher Education)"]])
outliers_classified <- industry_map[["Education (Higher Education)"]]industry_map[["Sales"]] <- c(outliers[grep("procurement",outliers)],
industry_map[["Sales"]])
outliers_classified <-c(industry_map[["Sales"]],
outliers_classified)industry_map[["Accounting, Banking & Finance"]] <- c(outliers[grep("acturial",outliers)],
industry_map[["Accounting, Banking & Finance"]])
industry_map[["Accounting, Banking & Finance"]] <- c(outliers[grep("finance",outliers)],
industry_map[["Accounting, Banking & Finance"]])
industry_map[["Accounting, Banking & Finance"]] <- c(outliers[grep("private equity",outliers)],
industry_map[["Accounting, Banking & Finance"]])
outliers_classified <-c(industry_map[["Accounting, Banking & Finance"]],
outliers_classified)industry_map[["Engineering or Manufacturing"]] <-c()
industry_map[["Engineering or Manufacturing"]] <- c(outliers[grep("^(?=.*manufactur)(?!.*food)(?!.*aerospace)",outliers, perl = TRUE)],
industry_map[["Engineering or Manufacturing"]])
industry_map[["Engineering or Manufacturing"]] <- c(outliers[grep("mining",outliers, perl = TRUE)],
industry_map[["Engineering or Manufacturing"]])
industry_map[["Engineering or Manufacturing"]] <- c(outliers[grep("oil",outliers, perl = TRUE)],
industry_map[["Engineering or Manufacturing"]])
industry_map[["Engineering or Manufacturing"]]## [1] "oil and gas"
## [2] "trades (supply chain) oil and gas"
## [3] "environment - oil and gas"
## [4] "oil & gas"
## [5] "oilfield adjacent"
## [6] "oil and gas"
## [7] "oil and gas exploration"
## [8] "oil"
## [9] "gas & oil"
## [10] "oil and gas safety training"
## [11] "oil & gas"
## [12] "energy: oil & gas"
## [13] "oil & gas - non destructive testing"
## [14] "energy (oil & gas & associated products, renewable power, etc)"
## [15] "energy - oil and gas"
## [16] "energy, oil & gas"
## [17] "energy/oil"
## [18] "energy sector: oil & gas"
## [19] "energy (oil & gas)"
## [20] "oil & gas"
## [21] "mining"
## [22] "mining and natural resources"
## [23] "mining & mineral processing"
## [24] "construction, mining, manufacturing"
## [25] "engineering - mining"
## [26] "mining/resource extraction"
## [27] "mining/mineral exploration"
## [28] "manufacturing"
## [29] "construction, mining, manufacturing"
## [30] "manufacturing security systems"
## [31] "soft drinks manufacturing"
## [32] "security and manufacturing company"
## [33] "animal health product manufacturing"
## [34] "manufacturing and distributing"
## [35] "apparel manufacture"
## [36] "pharma / medical device design and manufacturing"
## [37] "chemical manufacturing"
## [38] "manufacturing"
## [39] "high end outdoor furniture manufacturer"
## [40] "pharmaceutical manufacturing"
## [41] "synthetic chemical manufacturing"
## [42] "manufacturing (pharmaceuticals)"
## [43] "manufacturing/wholesale"
## [44] "cbd manufacturing"
## [45] "manufacturing : corporate admin support"
## [46] "biotech manufacturing"
## [47] "production and manufacturing"
## [48] "pharmaceutical manufacturing"
## [49] "manufacturing, chemical"
## [50] "r&d in manufacturing"
## [51] "wholesale textile manufacture and sales"
## [52] "manufacturing"
## [53] "pharmaceitical manufacturing"
## [54] "beauty manufacturing & education"
## [55] "manufacturing (medical devices)"
## [56] "manufacturing (personal care)"
outliers_classified <-c(industry_map[["Engineering or Manufacturing"]],
outliers_classified)industry_map[["Health care"]] <-c()
industry_map[["Health care"]] <- c(outliers[grep("^(?=.*medical)(?!.*not)",outliers, perl = TRUE)],
industry_map[["Health care"]])
industry_map[["Health care"]] <- c(outliers[grep("pharmaceuticals",outliers)],
industry_map[["Health care"]])
industry_map[["Health care"]] <- c(outliers[grep("beauty",outliers)],
industry_map[["Health care"]])
industry_map[["Health care"]] <- c(outliers[grep("bio",outliers)],
industry_map[["Health care"]])
industry_map[["Health care"]] <- c(outliers[grep("pharma",outliers)],
industry_map[["Health care"]])
industry_map[["Health care"]] <- c(outliers[grep("health",outliers)],
industry_map[["Health care"]])
outliers_classified <-c(industry_map[["Health care"]],
outliers_classified)
industry_map[["Health care"]]## [1] "not-for-profit health research consulting"
## [2] "research (health)"
## [3] "public health"
## [4] "animal health product manufacturing"
## [5] "public health in higher education"
## [6] "health insurance"
## [7] "healthcare it"
## [8] "public health"
## [9] "public health research"
## [10] "public/environmental health"
## [11] "environment, health, and safety"
## [12] "environmental health + pest control"
## [13] "public health"
## [14] "behavioral health"
## [15] "health research"
## [16] "research - public health"
## [17] "mental health"
## [18] "mental health"
## [19] "mental health therapist"
## [20] "environmental health and safety compliance"
## [21] "health and fitness"
## [22] "healthcare information technology"
## [23] "animal health industry"
## [24] "global health consulting"
## [25] "animal health"
## [26] "non-profit health care (i couldn’t select both)"
## [27] "public health- state level"
## [28] "mental health"
## [29] "behavior analysis/mental health"
## [30] "healthcare technology"
## [31] "public health, local government"
## [32] "public health (not medical)"
## [33] "environmental health and safety"
## [34] "environmental health and safety"
## [35] "pharmaceutical"
## [36] "pharmaceutical research"
## [37] "pharmaceuticals"
## [38] "biotech pharmaceuticals"
## [39] "pharma"
## [40] "pharmaceutical research & development"
## [41] "pharma"
## [42] "biopharma"
## [43] "pharma / medical device design and manufacturing"
## [44] "pharmaceutical"
## [45] "pharma/biotech"
## [46] "pharmaceutical manufacturing"
## [47] "biotech/pharma"
## [48] "biotech/pharma"
## [49] "pharmaceutical r&d"
## [50] "manufacturing (pharmaceuticals)"
## [51] "pharmaceuticals r&d"
## [52] "pharmaceutical development"
## [53] "retail pharmacy"
## [54] "biotech/pharmaceuticals"
## [55] "pharmacuticals"
## [56] "biotech/pharmaceuticals"
## [57] "biotech / pharmaceutical industry"
## [58] "real world evidence (data for pharma research)"
## [59] "big pharma"
## [60] "pharmaceutical manufacturing"
## [61] "pharmaceutical research (chemist)"
## [62] "pharma/biotech"
## [63] "pharmaceutical/contract research organization"
## [64] "pharmaceutical/biotech"
## [65] "pharmaceutical research"
## [66] "pharma/biotechnology"
## [67] "pharmaceutical industry"
## [68] "pharmaceuticals/biotechnology"
## [69] "medical/pharmaceutical"
## [70] "pharmaceuticals / biotech"
## [71] "language services company, unsure the broad category to use. our clients are branding agencies, and their clients are frequently (but not always) pharmaceutical companies."
## [72] "pharmaceuticals"
## [73] "pharmaceutical industry"
## [74] "pharma & biotech"
## [75] "pharmaceutical/biotechnology"
## [76] "pharmaceitical manufacturing"
## [77] "research scientist, pharma"
## [78] "pharma research"
## [79] "biopharmaceuticals"
## [80] "pharmaceutical company"
## [81] "pharma r&d"
## [82] "pharma/ research"
## [83] "science/biotech"
## [84] "biotechnology, research and development"
## [85] "biotech pharmaceuticals"
## [86] "biotechnology"
## [87] "biotech industry"
## [88] "biotech"
## [89] "probiotics"
## [90] "biological research"
## [91] "biologist"
## [92] "biotech (r&d)"
## [93] "biopharma"
## [94] "pharma/biotech"
## [95] "biotech"
## [96] "biotech/pharma"
## [97] "biotech/pharma"
## [98] "biomedical research"
## [99] "academia--cell and molecular biology"
## [100] "biotech / research"
## [101] "bio tech"
## [102] "biotechnology/life sciences"
## [103] "biotech / life sciences"
## [104] "biology/research"
## [105] "biotech/pharmaceuticals"
## [106] "biotech manufacturing"
## [107] "bioscience company"
## [108] "biotech research"
## [109] "science (research, biology)"
## [110] "biomedical research"
## [111] "biotech/pharmaceuticals"
## [112] "biomedical research"
## [113] "biotech / pharmaceutical industry"
## [114] "biotech/food safety"
## [115] "biological sciences"
## [116] "biotech research"
## [117] "veterinary biotech"
## [118] "pharma/biotech"
## [119] "pharmaceutical/biotech"
## [120] "biotech"
## [121] "biotechnology"
## [122] "lab science (biotech)"
## [123] "pharma/biotechnology"
## [124] "biotech/drug development"
## [125] "pharmaceuticals/biotechnology"
## [126] "pharmaceuticals / biotech"
## [127] "pharma & biotech"
## [128] "pharmaceutical/biotechnology"
## [129] "biotech/software"
## [130] "biopharmaceuticals"
## [131] "bioinformatics"
## [132] "beauty"
## [133] "beauty/service industry"
## [134] "beauty /cpg"
## [135] "beauty, cosmetics, fragrance"
## [136] "beauty manufacturing & education"
## [137] "pharmaceuticals"
## [138] "biotech pharmaceuticals"
## [139] "manufacturing (pharmaceuticals)"
## [140] "pharmaceuticals r&d"
## [141] "biotech/pharmaceuticals"
## [142] "biotech/pharmaceuticals"
## [143] "pharmaceuticals/biotechnology"
## [144] "pharmaceuticals / biotech"
## [145] "pharmaceuticals"
## [146] "biopharmaceuticals"
## [147] "medical research"
## [148] "medical research"
## [149] "third sector/non profit - medical membership in uk"
## [150] "medical sciences"
## [151] "medical devices"
## [152] "pharma / medical device design and manufacturing"
## [153] "medical library"
## [154] "libraries (medical)"
## [155] "biomedical research"
## [156] "medical communications"
## [157] "medical communications"
## [158] "biomedical research"
## [159] "biomedical research"
## [160] "medical technology"
## [161] "stem medical research"
## [162] "medical device"
## [163] "medical research"
## [164] "diagnostic medical devices"
## [165] "medical/pharmaceutical"
## [166] "medical supply wholesale & warehousing"
## [167] "medical technology"
## [168] "medical interpreter -(spanish)"
## [169] "manufacturing (medical devices)"
industry_map[["Computing or Tech"]] <- c(outliers[grep("^tech$",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("^technology",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("software",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("internet",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("video game",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("virtual real",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("ed-tech",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("edtech",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("e-commerce",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("ecommerce",outliers)],
industry_map[["Computing or Tech"]])
industry_map[["Computing or Tech"]] <- c(outliers[grep("technology",outliers)],
industry_map[["Computing or Tech"]])
outliers_classified <-c(industry_map[["Computing or Tech"]],
outliers_classified)industry_map[["Transport or Logistics"]] <- c(outliers[grep("distribution",outliers)],
industry_map[["Transport or Logistics"]])
industry_map[["Transport or Logistics"]] <- c(outliers[grep("import",outliers)],
industry_map[["Transport or Logistics"]])
industry_map[["Transport or Logistics"]] <- c(outliers[grep("supply chain",outliers)],
industry_map[["Transport or Logistics"]])
industry_map[["Transport or Logistics"]] <- c(outliers[grep("wholesale",outliers)],
industry_map[["Transport or Logistics"]])
industry_map[["Transport or Logistics"]] <- c(outliers[grep("warehous",outliers)],
industry_map[["Transport or Logistics"]])
outliers_classified <-c(industry_map[["Transport or Logistics"]],
outliers_classified)industry_map[["Government and Public Administration"]] <- c(outliers[grep("government",outliers)],
industry_map[["Government and Public Administration"]])
industry_map[["Government and Public Administration"]] <- c(outliers[grep("politic",outliers)],
industry_map[["Government and Public Administration"]])
industry_map[["Government and Public Administration"]] <- c(outliers[grep("administration",outliers)],
industry_map[["Government and Public Administration"]])
industry_map[["Government and Public Administration"]] <- c(outliers[grep("public health",outliers)],
industry_map[["Government and Public Administration"]])
industry_map[["Government and Public Administration"]] <- c(outliers[grep("urban planning",outliers)],
industry_map[["Government and Public Administration"]])
industry_map[["Government and Public Administration"]] <- c(outliers[grep("union",outliers)],
industry_map[["Government and Public Administration"]])
industry_map[["Government and Public Administration"]] <- c(outliers[grep("cultur",outliers)],
industry_map[["Government and Public Administration"]])
industry_map[["Government and Public Administration"]] <- c(outliers[grep("park",outliers)],
industry_map[["Government and Public Administration"]])
outliers_classified <-c(industry_map[["Government and Public Administration"]],
outliers_classified)industry_map[["Property or Construction"]] <- c(outliers[grep("architect",outliers)],
industry_map[["Property or Construction"]])
industry_map[["Property or Construction"]] <- c(outliers[grep("construct",outliers)],
industry_map[["Property or Construction"]])
industry_map[["Property or Construction"]] <- c(outliers[grep("real estate",outliers)],
industry_map[["Property or Construction"]])
industry_map[["Property or Construction"]] <- c(outliers[grep("interior",outliers)],
industry_map[["Property or Construction"]])
industry_map[["Property or Construction"]] <- c(outliers[grep("exterior",outliers)],
industry_map[["Property or Construction"]])
outliers_classified <-c(industry_map[["Property or Construction"]],
outliers_classified)industry_map[["Aerospace & Defence"]] <- c(outliers[grep("aerospace",outliers)],
industry_map[["Aerospace & Defence"]])
industry_map[["Aerospace & Defence"]] <- c(outliers[grep("defense",outliers)],
industry_map[["Aerospace & Defence"]])
industry_map[["Aerospace & Defence"]] <- c(outliers[grep("defence",outliers)],
industry_map[["Aerospace & Defence"]])
industry_map[["Aerospace & Defence"]] <- c(outliers[grep("military",outliers)],
industry_map[["Aerospace & Defence"]])
industry_map[["Aerospace & Defence"]] <- c(outliers[grep("aviation",outliers)],
industry_map[["Aerospace & Defence"]])
outliers_classified <-c(industry_map[["Aerospace & Defence"]],
outliers_classified)industry_map[["Library & Publishing"]] <- c(outliers[grep("library",outliers)],
industry_map[["Library & Publishing"]])
industry_map[["Library & Publishing"]] <- c(outliers[grep("libra",outliers)],
industry_map[["Library & Publishing"]])
industry_map[["Library & Publishing"]] <- c(outliers[grep("publishing",outliers)],
industry_map[["Library & Publishing"]])
industry_map[["Library & Publishing"]] <- c(outliers[grep("archiv",outliers)],
industry_map[["Library & Publishing"]])
outliers_classified <-c(industry_map[["Library & Publishing"]],
outliers_classified)industry_map[["Animal Care"]] <- c(outliers[grep("zoo",outliers)],
industry_map[["Animal Care"]])
industry_map[["Animal Care"]] <- c(outliers[grep("veterinary",outliers)],
industry_map[["Animal Care"]])
industry_map[["Animal Care"]] <- c(outliers[grep("veterinarian",outliers)],
industry_map[["Animal Care"]])
industry_map[["Animal Care"]] <- c(outliers[grep("animal",outliers)],
industry_map[["Animal Care"]])
industry_map[["Animal Care"]] <- c(outliers[grep("pet",outliers)],
industry_map[["Animal Care"]])
outliers_classified <-c(industry_map[["Animal Care"]],
outliers_classified)
industry_map[["Animal Care"]]## [1] "pet care industry (dog training/walking)"
## [2] "pet"
## [3] "pet care industry"
## [4] "pet care"
## [5] "petroleum"
## [6] "pet care/grooming"
## [7] "animal health product manufacturing"
## [8] "animal caretaker"
## [9] "animal health industry"
## [10] "animal health"
## [11] "animal welfare"
## [12] "animal care"
## [13] "animal care"
## [14] "veterinarian"
## [15] "veterinary"
## [16] "veterinary medicine"
## [17] "veterinary care"
## [18] "veterinary services"
## [19] "veterinary biotech"
## [20] "veterinary diagnostics"
## [21] "veterinary m&a"
## [22] "veterinary care"
## [23] "zoo"
## [24] "zoos and aquariums"
## [25] ""
industry_map[["Food & Bevarage"]] <- c(outliers[grep("wine",outliers)],
industry_map[["Food & Bevarage"]])
industry_map[["Food & Bevarage"]] <- c(outliers[grep("bevarage",outliers)],
industry_map[["Food & Bevarage"]])
industry_map[["Food & Bevarage"]] <- c(outliers[grep("beer",outliers)],
industry_map[["Food & Bevarage"]])
industry_map[["Food & Bevarage"]] <- c(outliers[grep("spirit",outliers)],
industry_map[["Food & Bevarage"]])
industry_map[["Food & Bevarage"]] <- c(outliers[grep("beverage",outliers)],
industry_map[["Food & Bevarage"]])
industry_map[["Food & Bevarage"]] <- c(outliers[grep("food",outliers)],
industry_map[["Food & Bevarage"]])
outliers_classified <-c(industry_map[["Food & Bevarage"]],
outliers_classified)industry_map[["Hospitality & Events"]] <- c(outliers[grep("restaurant",outliers)],
industry_map[["Hospitality & Events"]])
industry_map[["Hospitality & Events"]] <- c(outliers[grep("food service",outliers)],
industry_map[["Hospitality & Events"]])
outliers_classified <-c(industry_map[["Hospitality & Events"]],
outliers_classified)industry_map[["Business or Consulting"]] <- c(outliers[grep("consulting",outliers)],
industry_map[["Business or Consulting"]])
industry_map[["Business or Consulting"]] <- c(outliers[grep("environmental",outliers)],
industry_map[["Business or Consulting"]])
industry_map[["Business or Consulting"]] <- c(outliers[grep("consult",outliers)],
industry_map[["Business or Consulting"]])
outliers_classified <-c(industry_map[["Business or Consulting"]],
outliers_classified)industry_map[["Automotive"]] <- c(outliers[grep("auto",outliers)],
industry_map[["Automotive"]])
outliers_classified <-c(industry_map[["Automotive"]],
outliers_classified)industry_map[["Recruitment or HR"]] <- c(outliers[grep("workforce",outliers)],
industry_map[["Recruitment or HR"]])
industry_map[["Recruitment or HR"]] <- c(outliers[grep("staffing",outliers)],
industry_map[["Recruitment or HR"]])
industry_map[["Recruitment or HR"]] <- c(outliers[grep("human",outliers)],
industry_map[["Recruitment or HR"]])
outliers_classified <-c(industry_map[["Recruitment or HR"]],
outliers_classified)industry_map[["Nonprofits"]] <- c(outliers[grep("non profit",outliers)],
industry_map[["Nonprofits"]])
industry_map[["Nonprofits"]] <- c(outliers[grep("non-profit",outliers)],
industry_map[["Nonprofits"]])
industry_map[["Nonprofits"]] <- c(outliers[grep("nonprofit",outliers)],
industry_map[["Nonprofits"]])
outliers_classified <-c(industry_map[["Nonprofits"]],
outliers_classified)values(industry_map)## $`Accounting, Banking & Finance`
## [1] "commercial real estate - private equity"
## [2] "private equity"
## [3] "finance"
## [4] "automotive finance and insurance"
## [5] "finance/investment management but in legal/compliance, so back-office"
## [6] "i work in the finance function of a large global conglomerate"
## [7] "finance"
## [8] "professional association in finance"
## [9] ""
##
## $`Aerospace & Defence`
## [1] "aerospace/aviation"
## [2] "aviation"
## [3] "instructional design, aviation industry"
## [4] "aerospace/aviation"
## [5] "aviation"
## [6] "military"
## [7] "international defence"
## [8] "defense"
## [9] "aerospace/defense"
## [10] "defense contracting"
## [11] "defense contracting"
## [12] "research & development (defense industry)"
## [13] "aerospace and defense"
## [14] "aerospace & defense"
## [15] "aerospace and defense manufacturing"
## [16] "defense contractor"
## [17] "aerospace and defense/government contracting"
## [18] "aerospace/aviation"
## [19] "aerospace contracting"
## [20] "aerospace"
## [21] "aerospace/defense"
## [22] "aerospace data"
## [23] "aerospace/aviation"
## [24] "aerospace and defense"
## [25] "aerospace & defense"
## [26] "aerospace and defense manufacturing"
## [27] "aerospace and defense/government contracting"
## [28] ""
##
## $`Agriculture or Forestry`
## [1] ""
##
## $`Animal Care`
## [1] "pet care industry (dog training/walking)"
## [2] "pet"
## [3] "pet care industry"
## [4] "pet care"
## [5] "petroleum"
## [6] "pet care/grooming"
## [7] "animal health product manufacturing"
## [8] "animal caretaker"
## [9] "animal health industry"
## [10] "animal health"
## [11] "animal welfare"
## [12] "animal care"
## [13] "animal care"
## [14] "veterinarian"
## [15] "veterinary"
## [16] "veterinary medicine"
## [17] "veterinary care"
## [18] "veterinary services"
## [19] "veterinary biotech"
## [20] "veterinary diagnostics"
## [21] "veterinary m&a"
## [22] "veterinary care"
## [23] "zoo"
## [24] "zoos and aquariums"
## [25] ""
##
## $`Art & Design`
## [1] ""
##
## $Automotive
## [1] "automotive" "auto repair"
## [3] "automotive finance and insurance" "automotive repair"
## [5] "automtive repair" "automotive technician"
## [7] "automotive repair" "auto repair"
## [9] "auto mfg." ""
##
## $Biotechnology
## [1] ""
##
## $`Business or Consulting`
## [1] "not-for-profit health research consulting"
## [2] "environmental consulting"
## [3] "specialist policy consulting/research"
## [4] "not for profit education consultancy"
## [5] "management consulting"
## [6] "consulting"
## [7] "political consulting"
## [8] "environmental consulting"
## [9] "lobbying and consulting"
## [10] "architecture and engineering consulting and design"
## [11] "engineering and environmental consulting"
## [12] "consultant"
## [13] "education consulting"
## [14] "political consulting"
## [15] "social science research - not quite academia, not quite nonprofit, not quite consulting"
## [16] "global health consulting"
## [17] "strategy consulting"
## [18] "consulting"
## [19] "consulting / professional services"
## [20] "env. consulting"
## [21] "freelance/self-employed consultant"
## [22] "consulting operations- big 4"
## [23] "grantwriting consultants"
## [24] "environmental consultanting"
## [25] "environmental"
## [26] "environmental science"
## [27] "environmental consulting"
## [28] "environmental regulation"
## [29] "environmental/cultural resource management"
## [30] "environmental sciences"
## [31] "environmental planning"
## [32] "public/environmental health"
## [33] "environmental health + pest control"
## [34] "environmental consulting"
## [35] "engineering and environmental consulting"
## [36] "environmental health and safety compliance"
## [37] "environmental survey"
## [38] "environmental sciences"
## [39] "environmental compliance"
## [40] "environmental compliance/engineering"
## [41] "environmental restoration"
## [42] "environmental services"
## [43] "environmental"
## [44] "environmental compliance"
## [45] "environmental health and safety"
## [46] "environmental consultanting"
## [47] "environmental health and safety"
## [48] "not-for-profit health research consulting"
## [49] "environmental consulting"
## [50] "specialist policy consulting/research"
## [51] "management consulting"
## [52] "consulting"
## [53] "political consulting"
## [54] "environmental consulting"
## [55] "lobbying and consulting"
## [56] "architecture and engineering consulting and design"
## [57] "engineering and environmental consulting"
## [58] "education consulting"
## [59] "political consulting"
## [60] "social science research - not quite academia, not quite nonprofit, not quite consulting"
## [61] "global health consulting"
## [62] "strategy consulting"
## [63] "consulting"
## [64] "consulting / professional services"
## [65] "env. consulting"
## [66] "consulting operations- big 4"
## [67] ""
##
## $Church
## [1] ""
##
## $`Computing or Tech`
## [1] "biotechnology, research and development"
## [2] "biotechnology"
## [3] "educational technology - hybrid between book publishing and technology really"
## [4] "technology/saas"
## [5] "educational technology"
## [6] "information technology"
## [7] "biotechnology/life sciences"
## [8] "technology"
## [9] "marketing technology"
## [10] "medical technology"
## [11] "healthcare information technology"
## [12] "biotechnology"
## [13] "pharma/biotechnology"
## [14] "ecommerce - technology"
## [15] "information technology (it)"
## [16] "pharmaceuticals/biotechnology"
## [17] "healthcare technology"
## [18] "medical technology"
## [19] "pharmaceutical/biotechnology"
## [20] "ecommerce"
## [21] "ecommerce fraud"
## [22] "ecommerce - technology"
## [23] "ecommerce"
## [24] "e-commerce"
## [25] "fashion/e-commerce"
## [26] "e-commerce"
## [27] "edtech"
## [28] "publishing/edtech"
## [29] "education research- mix of edtech and non profits"
## [30] "edtech"
## [31] "virtual reality"
## [32] "video games"
## [33] "video games"
## [34] "video game industry"
## [35] "video games"
## [36] "internet"
## [37] "software development / it"
## [38] "saas company/software"
## [39] "software as a service saas"
## [40] "payroll software"
## [41] "govtech software as a service"
## [42] "software/programming"
## [43] "software"
## [44] "real estate software"
## [45] "software development"
## [46] "software products"
## [47] "biotech/software"
## [48] "technology/saas"
## [49] "technology"
## [50] "tech"
## [51] ""
##
## $`Education (Higher Education)`
## [1] "college athletics"
## [2] "large university administration"
## [3] "library (university)"
## [4] "university administration"
## [5] "university tech transfer (higher ed/marketing/writing)"
## [6] "museum (university affiliated)"
## [7] "fundraising for a university"
## [8] "university research"
## [9] "research at a state university"
## [10] "library at a university"
## [11] "university libraries"
## [12] "hybrid nonprofit higher education (we are part of a university but our entire budget comes from grants)"
## [13] "public health in higher education"
## [14] "fundraising in higher education; nonclinical, nonacademic"
## [15] "data/institutional research in higher education"
## [16] "hybrid nonprofit higher education (we are part of a university but our entire budget comes from grants)"
## [17] "higher education/libraries"
## [18] "higher education fundraising"
## [19] "academic research"
## [20] "academia"
## [21] "research/academia"
## [22] "academic publishing"
## [23] "academic science"
## [24] "academic medicine"
## [25] "academic publishing"
## [26] "academic research (psychology)"
## [27] "academic/nonprofit research"
## [28] "libraries and archives (academic)"
## [29] "academic science"
## [30] "academia--cell and molecular biology"
## [31] "fundraising in higher education; nonclinical, nonacademic"
## [32] "academic scientific research"
## [33] "academic research (social science)"
## [34] "science/research (academia)"
## [35] "science academia"
## [36] "academia / research"
## [37] "publishing (academic)"
## [38] "research - academic"
## [39] "research and development academia"
## [40] "academic research"
## [41] "academic press production"
## [42] "publishing: science, academic, technical"
## [43] "academia - stem"
## [44] "academic publishing"
##
## $`Education (Primary/Secondary)`
## [1] ""
##
## $`Engineering or Manufacturing`
## [1] "oil and gas"
## [2] "trades (supply chain) oil and gas"
## [3] "environment - oil and gas"
## [4] "oil & gas"
## [5] "oilfield adjacent"
## [6] "oil and gas"
## [7] "oil and gas exploration"
## [8] "oil"
## [9] "gas & oil"
## [10] "oil and gas safety training"
## [11] "oil & gas"
## [12] "energy: oil & gas"
## [13] "oil & gas - non destructive testing"
## [14] "energy (oil & gas & associated products, renewable power, etc)"
## [15] "energy - oil and gas"
## [16] "energy, oil & gas"
## [17] "energy/oil"
## [18] "energy sector: oil & gas"
## [19] "energy (oil & gas)"
## [20] "oil & gas"
## [21] "mining"
## [22] "mining and natural resources"
## [23] "mining & mineral processing"
## [24] "construction, mining, manufacturing"
## [25] "engineering - mining"
## [26] "mining/resource extraction"
## [27] "mining/mineral exploration"
## [28] "manufacturing"
## [29] "construction, mining, manufacturing"
## [30] "manufacturing security systems"
## [31] "soft drinks manufacturing"
## [32] "security and manufacturing company"
## [33] "animal health product manufacturing"
## [34] "manufacturing and distributing"
## [35] "apparel manufacture"
## [36] "pharma / medical device design and manufacturing"
## [37] "chemical manufacturing"
## [38] "manufacturing"
## [39] "high end outdoor furniture manufacturer"
## [40] "pharmaceutical manufacturing"
## [41] "synthetic chemical manufacturing"
## [42] "manufacturing (pharmaceuticals)"
## [43] "manufacturing/wholesale"
## [44] "cbd manufacturing"
## [45] "manufacturing : corporate admin support"
## [46] "biotech manufacturing"
## [47] "production and manufacturing"
## [48] "pharmaceutical manufacturing"
## [49] "manufacturing, chemical"
## [50] "r&d in manufacturing"
## [51] "wholesale textile manufacture and sales"
## [52] "manufacturing"
## [53] "pharmaceitical manufacturing"
## [54] "beauty manufacturing & education"
## [55] "manufacturing (medical devices)"
## [56] "manufacturing (personal care)"
##
## $Entertainment
## [1] ""
##
## $Fashion
## [1] ""
##
## $`Food & Bevarage`
## [1] "food processing and packaging"
## [2] "food processing"
## [3] "food distribution"
## [4] "food production"
## [5] "food"
## [6] "food production/processing"
## [7] "food and drink"
## [8] "food and beverage"
## [9] "food manufacturers"
## [10] "food manufacture"
## [11] "food manufacturing"
## [12] "food service"
## [13] "food service"
## [14] "food & beverage"
## [15] "food & beverage production"
## [16] "food and flavor"
## [17] "food demos"
## [18] "restaurant/food service"
## [19] "food & nutrition"
## [20] "biotech/food safety"
## [21] "food manufacturing"
## [22] "food/beverage manufacturing- quality/laboratory"
## [23] "food processing"
## [24] "research and development, food and beverage"
## [25] "food service --- baking"
## [26] "food distribution"
## [27] "food & beverages"
## [28] "warehouse- food and beverage"
## [29] "administration (food service)"
## [30] "food industry"
## [31] "food/quick service restaurant (qsr)"
## [32] "food industry"
## [33] "consumer food products"
## [34] "foodservice"
## [35] "fast food"
## [36] "food and beverage"
## [37] "food & beverage"
## [38] "food & beverage production"
## [39] "beverage production"
## [40] "food/beverage manufacturing- quality/laboratory"
## [41] "research and development, food and beverage"
## [42] "food & beverages"
## [43] "beverage"
## [44] "warehouse- food and beverage"
## [45] "beverage distribution"
## [46] "beverage & spirits"
## [47] "faith/spirituality"
## [48] "beverage & spirits"
## [49] "craft beer industry"
## [50] "beer sales"
## [51] "wine importing/distribution"
## [52] "wine wholesale"
## [53] "wine"
## [54] "winery regulatory compliance"
## [55] ""
##
## $`Government and Public Administration`
## [1] "parks and recreation, land management but with customer service included"
## [2] "parking"
## [3] "horticulture (admin)"
## [4] "cultural (museums/galleries)"
## [5] "culture"
## [6] "arts, culture and heritage"
## [7] "archaeology / cultural resource management"
## [8] "environmental/cultural resource management"
## [9] "cultural resources management/major univ."
## [10] "cultural heritage"
## [11] "agriculture/agriculture chemical"
## [12] "cultural resource management"
## [13] "horticulture"
## [14] "archaeology/cultural resource manager"
## [15] "labor union"
## [16] "wherever i'm assigned via the union"
## [17] "labor union"
## [18] "union/political organizing"
## [19] "unions"
## [20] "urban planning"
## [21] "public health"
## [22] "public health in higher education"
## [23] "public health"
## [24] "public health research"
## [25] "public health"
## [26] "research - public health"
## [27] "public health- state level"
## [28] "public health, local government"
## [29] "public health (not medical)"
## [30] "large university administration"
## [31] "administration, it"
## [32] "university administration"
## [33] "research administration"
## [34] "administration"
## [35] "office administration"
## [36] "administration (food service)"
## [37] "arts administration"
## [38] "pension benefit administration"
## [39] "benefits administration"
## [40] "politics"
## [41] "political campaigning"
## [42] "political campaigns"
## [43] "political consulting"
## [44] "politics/campaigns"
## [45] "politics/campaigns"
## [46] "union/political organizing"
## [47] "political consulting"
## [48] "political campaign"
## [49] "politics/government relations"
## [50] "political research"
## [51] "government research"
## [52] "tourism/heritage -- but for a government building"
## [53] "government relation"
## [54] "government contract"
## [55] "intergovernmental organization"
## [56] "government contractor"
## [57] "\"government relations\" (lobbying)"
## [58] "municipal government (library)"
## [59] "government- scientist"
## [60] "government relations/lobbying"
## [61] "government contracting"
## [62] "federal government contracting"
## [63] "government contracting"
## [64] "government contractor"
## [65] "government affairs/lobbying"
## [66] "public library (might be considered government, but that always seems an odd designation...)"
## [67] "science research, government"
## [68] "government relations"
## [69] "public library (non-profit, but also government?)"
## [70] "government contractor (r&d)"
## [71] "government relations"
## [72] "government contracting (data analytics and program evaluations)"
## [73] "science/government"
## [74] "government contractor, international development"
## [75] "aerospace and defense/government contracting"
## [76] "public health, local government"
## [77] "politics/government relations"
## [78] "government"
## [79] "government contracting r&d"
## [80] ""
##
## $`Health care`
## [1] "not-for-profit health research consulting"
## [2] "research (health)"
## [3] "public health"
## [4] "animal health product manufacturing"
## [5] "public health in higher education"
## [6] "health insurance"
## [7] "healthcare it"
## [8] "public health"
## [9] "public health research"
## [10] "public/environmental health"
## [11] "environment, health, and safety"
## [12] "environmental health + pest control"
## [13] "public health"
## [14] "behavioral health"
## [15] "health research"
## [16] "research - public health"
## [17] "mental health"
## [18] "mental health"
## [19] "mental health therapist"
## [20] "environmental health and safety compliance"
## [21] "health and fitness"
## [22] "healthcare information technology"
## [23] "animal health industry"
## [24] "global health consulting"
## [25] "animal health"
## [26] "non-profit health care (i couldn’t select both)"
## [27] "public health- state level"
## [28] "mental health"
## [29] "behavior analysis/mental health"
## [30] "healthcare technology"
## [31] "public health, local government"
## [32] "public health (not medical)"
## [33] "environmental health and safety"
## [34] "environmental health and safety"
## [35] "pharmaceutical"
## [36] "pharmaceutical research"
## [37] "pharmaceuticals"
## [38] "biotech pharmaceuticals"
## [39] "pharma"
## [40] "pharmaceutical research & development"
## [41] "pharma"
## [42] "biopharma"
## [43] "pharma / medical device design and manufacturing"
## [44] "pharmaceutical"
## [45] "pharma/biotech"
## [46] "pharmaceutical manufacturing"
## [47] "biotech/pharma"
## [48] "biotech/pharma"
## [49] "pharmaceutical r&d"
## [50] "manufacturing (pharmaceuticals)"
## [51] "pharmaceuticals r&d"
## [52] "pharmaceutical development"
## [53] "retail pharmacy"
## [54] "biotech/pharmaceuticals"
## [55] "pharmacuticals"
## [56] "biotech/pharmaceuticals"
## [57] "biotech / pharmaceutical industry"
## [58] "real world evidence (data for pharma research)"
## [59] "big pharma"
## [60] "pharmaceutical manufacturing"
## [61] "pharmaceutical research (chemist)"
## [62] "pharma/biotech"
## [63] "pharmaceutical/contract research organization"
## [64] "pharmaceutical/biotech"
## [65] "pharmaceutical research"
## [66] "pharma/biotechnology"
## [67] "pharmaceutical industry"
## [68] "pharmaceuticals/biotechnology"
## [69] "medical/pharmaceutical"
## [70] "pharmaceuticals / biotech"
## [71] "language services company, unsure the broad category to use. our clients are branding agencies, and their clients are frequently (but not always) pharmaceutical companies."
## [72] "pharmaceuticals"
## [73] "pharmaceutical industry"
## [74] "pharma & biotech"
## [75] "pharmaceutical/biotechnology"
## [76] "pharmaceitical manufacturing"
## [77] "research scientist, pharma"
## [78] "pharma research"
## [79] "biopharmaceuticals"
## [80] "pharmaceutical company"
## [81] "pharma r&d"
## [82] "pharma/ research"
## [83] "science/biotech"
## [84] "biotechnology, research and development"
## [85] "biotech pharmaceuticals"
## [86] "biotechnology"
## [87] "biotech industry"
## [88] "biotech"
## [89] "probiotics"
## [90] "biological research"
## [91] "biologist"
## [92] "biotech (r&d)"
## [93] "biopharma"
## [94] "pharma/biotech"
## [95] "biotech"
## [96] "biotech/pharma"
## [97] "biotech/pharma"
## [98] "biomedical research"
## [99] "academia--cell and molecular biology"
## [100] "biotech / research"
## [101] "bio tech"
## [102] "biotechnology/life sciences"
## [103] "biotech / life sciences"
## [104] "biology/research"
## [105] "biotech/pharmaceuticals"
## [106] "biotech manufacturing"
## [107] "bioscience company"
## [108] "biotech research"
## [109] "science (research, biology)"
## [110] "biomedical research"
## [111] "biotech/pharmaceuticals"
## [112] "biomedical research"
## [113] "biotech / pharmaceutical industry"
## [114] "biotech/food safety"
## [115] "biological sciences"
## [116] "biotech research"
## [117] "veterinary biotech"
## [118] "pharma/biotech"
## [119] "pharmaceutical/biotech"
## [120] "biotech"
## [121] "biotechnology"
## [122] "lab science (biotech)"
## [123] "pharma/biotechnology"
## [124] "biotech/drug development"
## [125] "pharmaceuticals/biotechnology"
## [126] "pharmaceuticals / biotech"
## [127] "pharma & biotech"
## [128] "pharmaceutical/biotechnology"
## [129] "biotech/software"
## [130] "biopharmaceuticals"
## [131] "bioinformatics"
## [132] "beauty"
## [133] "beauty/service industry"
## [134] "beauty /cpg"
## [135] "beauty, cosmetics, fragrance"
## [136] "beauty manufacturing & education"
## [137] "pharmaceuticals"
## [138] "biotech pharmaceuticals"
## [139] "manufacturing (pharmaceuticals)"
## [140] "pharmaceuticals r&d"
## [141] "biotech/pharmaceuticals"
## [142] "biotech/pharmaceuticals"
## [143] "pharmaceuticals/biotechnology"
## [144] "pharmaceuticals / biotech"
## [145] "pharmaceuticals"
## [146] "biopharmaceuticals"
## [147] "medical research"
## [148] "medical research"
## [149] "third sector/non profit - medical membership in uk"
## [150] "medical sciences"
## [151] "medical devices"
## [152] "pharma / medical device design and manufacturing"
## [153] "medical library"
## [154] "libraries (medical)"
## [155] "biomedical research"
## [156] "medical communications"
## [157] "medical communications"
## [158] "biomedical research"
## [159] "biomedical research"
## [160] "medical technology"
## [161] "stem medical research"
## [162] "medical device"
## [163] "medical research"
## [164] "diagnostic medical devices"
## [165] "medical/pharmaceutical"
## [166] "medical supply wholesale & warehousing"
## [167] "medical technology"
## [168] "medical interpreter -(spanish)"
## [169] "manufacturing (medical devices)"
##
## $`Hospitality & Events`
## [1] "food service" "food service"
## [3] "restaurant/food service" "food service --- baking"
## [5] "administration (food service)" "restaurant"
## [7] "restaurant/food service" "restaurant group"
## [9] "restaurant/service" "fast casual restaurant"
## [11] "food/quick service restaurant (qsr)" "restaurants & hospitality"
## [13] ""
##
## $Insurance
## [1] ""
##
## $Law
## [1] ""
##
## $`Law Enforcement & Security`
## [1] ""
##
## $`Leisure, Sport & Tourism`
## [1] ""
##
## $`Library & Publishing`
## [1] "libraries and archives"
## [2] "information management/archives"
## [3] "archives"
## [4] "libraries & archives"
## [5] "libraries and archives (academic)"
## [6] "library/archives"
## [7] "library/archive"
## [8] "library/archive/research center"
## [9] "archives/library science"
## [10] "museums & archives"
## [11] "libraries / archives / information"
## [12] "archives/libraries"
## [13] "libraries/museums/archives"
## [14] "libraries/archives"
## [15] "museums & archives (not sure where this would fall)"
## [16] "publishing"
## [17] "educational publishing"
## [18] "publishing (book)"
## [19] "scientific publishing"
## [20] "academic publishing"
## [21] "educational technology - hybrid between book publishing and technology really"
## [22] "publishing"
## [23] "academic publishing"
## [24] "editor in educational publishing"
## [25] "education publishing"
## [26] "educational publishing"
## [27] "book publishing"
## [28] "book publishing"
## [29] "publishing/edtech"
## [30] "digital marketing within a book publishing company (please reclassify as you see fit)"
## [31] "customer service/publishing-adjacent"
## [32] "publishing (academic)"
## [33] "educational publishing / ed tech"
## [34] "science publishing"
## [35] "publishing: science, academic, technical"
## [36] "tabletop games publishing"
## [37] "academic publishing"
## [38] "publishing, content as a service"
## [39] "print publishing"
## [40] "libraries and archives"
## [41] "library"
## [42] "libraries"
## [43] "library"
## [44] "public library"
## [45] "law library"
## [46] "library tech for a school system"
## [47] "public library"
## [48] "public libraries"
## [49] "library (university)"
## [50] "library/information managment"
## [51] "museum library"
## [52] "public library"
## [53] "public library (technically city govt.?)"
## [54] "librarian"
## [55] "public librarian"
## [56] "municipal library"
## [57] "medical library"
## [58] "libraries & archives"
## [59] "library (its a non-profit and its a govt job - how would i list that? not all libraries are govt jobs)"
## [60] "libraries and archives (academic)"
## [61] "municipal government (library)"
## [62] "library/archives"
## [63] "special collections library"
## [64] "libraries (medical)"
## [65] "librarian--contractor for nasa"
## [66] "library page (public county library)"
## [67] "information services (libraries)"
## [68] "public libraries"
## [69] "public libraries"
## [70] "library/archive"
## [71] "librarian and assistant manager of a library"
## [72] "librarian in legal setting"
## [73] "professional public librarian"
## [74] "public library (might be considered government, but that always seems an odd designation...)"
## [75] "library/archive/research center"
## [76] "archives/library science"
## [77] "public library (non-profit, but also government?)"
## [78] "library--public"
## [79] "municipal (public) libraries"
## [80] "information services (library)"
## [81] "libraries / archives / information"
## [82] "libraries (public)"
## [83] "libraries"
## [84] "library science / part-time work/study"
## [85] "libraries (public)"
## [86] "archives/libraries"
## [87] "library and information services"
## [88] "library and information science"
## [89] "libraries/museums/archives"
## [90] "library at a university"
## [91] "university libraries"
## [92] "libraries/archives"
## [93] "libraries"
## [94] "higher education/libraries"
## [95] "public library"
## [96] "information services/libraries"
## [97] "public/research library"
## [98] "library"
## [99] "library"
## [100] "public library"
## [101] "law library"
## [102] "library tech for a school system"
## [103] "public library"
## [104] "library (university)"
## [105] "library/information managment"
## [106] "museum library"
## [107] "public library"
## [108] "public library (technically city govt.?)"
## [109] "municipal library"
## [110] "medical library"
## [111] "library (its a non-profit and its a govt job - how would i list that? not all libraries are govt jobs)"
## [112] "municipal government (library)"
## [113] "library/archives"
## [114] "special collections library"
## [115] "library page (public county library)"
## [116] "library/archive"
## [117] "librarian and assistant manager of a library"
## [118] "public library (might be considered government, but that always seems an odd designation...)"
## [119] "library/archive/research center"
## [120] "archives/library science"
## [121] "public library (non-profit, but also government?)"
## [122] "library--public"
## [123] "information services (library)"
## [124] "library science / part-time work/study"
## [125] "library and information services"
## [126] "library and information science"
## [127] "library at a university"
## [128] "public library"
## [129] "public/research library"
## [130] ""
##
## $`Marketing, Advertising & PR`
## [1] ""
##
## $`Media & Digital`
## [1] ""
##
## $`Mining and Mentals`
## [1] ""
##
## $Nonprofits
## [1] "computing/tech + higher ed + nonprofit"
## [2] "museum - nonprofit"
## [3] "academic/nonprofit research"
## [4] "museums: nonprofit"
## [5] "nonprofit scholarly society publisher"
## [6] "social science research - not quite academia, not quite nonprofit, not quite consulting"
## [7] "nonprofit - legal department"
## [8] "nonprofit - lort d theater"
## [9] "nonprofit association"
## [10] "affordable housing real estate development (nonprofit)"
## [11] "hybrid nonprofit higher education (we are part of a university but our entire budget comes from grants)"
## [12] "library (its a non-profit and its a govt job - how would i list that? not all libraries are govt jobs)"
## [13] "public library (non-profit, but also government?)"
## [14] "non-profit health care (i couldn’t select both)"
## [15] "non-profit theatre"
## [16] "third sector/non profit - medical membership in uk"
## [17] "marketing at a non profit"
## [18] "non profit theater"
## [19] "non profit theater"
## [20] "education research- mix of edtech and non profits"
## [21] ""
##
## $`Property or Construction`
## [1] "interior design & architecture"
## [2] "interior design (commercial)"
## [3] "interior landscaping"
## [4] "real estate development"
## [5] "commercial real estate tenancy"
## [6] "real estate"
## [7] "real estate"
## [8] "commercial real estate"
## [9] "real estate services"
## [10] "real estate corp. office/not a realtor"
## [11] "real estate investment"
## [12] "title/real estate"
## [13] "real estate"
## [14] "real estate: title & escrow"
## [15] "commercial real estate - private equity"
## [16] "real estate/ mortgage"
## [17] "real estate investment support"
## [18] "real estate software"
## [19] "real estate/development"
## [20] "retail real estate"
## [21] "real estate affordable housing"
## [22] "affordable housing real estate development (nonprofit)"
## [23] "real estate servicea"
## [24] "real estate customer care"
## [25] "real estate valuation"
## [26] "real estate title company"
## [27] "real estate association"
## [28] "commercial real estate data and analytics/research"
## [29] "construction"
## [30] "construction"
## [31] "construction, mining, manufacturing"
## [32] "construction / stone industry"
## [33] "construction, hvac"
## [34] "architecture/construction"
## [35] "construction management"
## [36] "architecture, engineering, construction"
## [37] "architecture & construction"
## [38] "architecture"
## [39] "interior design & architecture"
## [40] "architecture"
## [41] "architect"
## [42] "architecture and engineering consulting and design"
## [43] "architecture / engineering"
## [44] "architectural/land planning/civil engineering"
## [45] "architecture/construction"
## [46] "professional services / architecture"
## [47] "landscape architecture"
## [48] "architecture, engineering, construction"
## [49] "architecture & construction"
## [50] ""
##
## $`Recruitment or HR`
## [1] "human resources" "human capital management"
## [3] "human services" "staffing industrry"
## [5] "staffing agency" "staffing firm"
## [7] "staffing & workforce solutions" "workforce development"
## [9] "staffing & workforce solutions" ""
##
## $Retail
## [1] ""
##
## $Sales
## [1] "procurement" "procurement/sourcing/operations"
## [3] "sourcing & procurement" ""
##
## $`Social Work`
## [1] ""
##
## $`Transport or Logistics`
## [1] "warehousing"
## [2] "warehouse- food and beverage"
## [3] "medical supply wholesale & warehousing"
## [4] "warehouse"
## [5] "wholesale distribution b2b"
## [6] "wholesale and retail trade"
## [7] "wholesale distribution"
## [8] "wholesale - apparel"
## [9] "manufacturing/wholesale"
## [10] "wholesale trade"
## [11] "wholesale supplier"
## [12] "wholesale/distrbution"
## [13] "wine wholesale"
## [14] "children's book wholesale"
## [15] "wholesale"
## [16] "wholesale textile manufacture and sales"
## [17] "medical supply wholesale & warehousing"
## [18] "wholesale industrial & welding supplies & equipment"
## [19] "trades (supply chain) oil and gas"
## [20] "supply chain"
## [21] "supply chain distribution"
## [22] "supply chain"
## [23] "supply chain!"
## [24] "wine importing/distribution"
## [25] "coffee - importing"
## [26] "wholesale distribution b2b"
## [27] "food distribution"
## [28] "wine importing/distribution"
## [29] "wholesale distribution"
## [30] "supply chain distribution"
## [31] "distribution"
## [32] "grocery distribution"
## [33] "commercial building material distribution"
## [34] "food distribution"
## [35] "beverage distribution"
## [36] ""
##
## $`Utilities & Telecommunications`
## [1] ""
# check outliers reamining
outliers_left <- outliers_table%>%
filter(!industry %in% outliers_classified)%>%
group_by(industry)%>%
summarise(total = n())
outliers_leftThe remaining data is too fragmented to be worth further cleaning.
test <- ask_a_manager_2021_cleaned%>%
mutate(industry_cleaned = case_when(industry %in% standard_industry ~ industry))%>%
mutate(industry_lower = tolower(industry))
del("NA", industry_map)
for(i in standard_industry)
{
test<-test%>%
mutate(industry_cleaned = case_when(industry_lower %in% values(industry_map, keys = i) ~ i,
TRUE ~ industry_cleaned))
}
ask_a_manager_2021_cleaned <- testAdding ISCO classification to the dataset
occupation_classification <- labourR::classify_occupation(corpus = ask_a_manager_2021_cleaned,
isco_level = 2,
lang = "en",
num_leaves = 10,
text_col = "industry",
id = "timestamp")%>%
mutate(timestamp = ymd_hms(timestamp))
ask_a_manager_2021_cleaned <- ask_a_manager_2021_cleaned%>%
mutate(timestamp = ymd_hms(timestamp))
ask_a_manager_2021_cleaned <- left_join(ask_a_manager_2021_cleaned,
occupation_classification,
by = "timestamp")%>%
clean_names()
labR_classifications <- unique(occupation_classification$preferredLabel)
labR_classifications## [1] "Teaching professionals"
## [2] "Science and engineering professionals"
## [3] "Business and administration professionals"
## [4] "Business and administration associate professionals"
## [5] "Legal, social, cultural and related associate professionals"
## [6] "Science and engineering associate professionals"
## [7] "Administrative and commercial managers"
## [8] "Production and specialised services managers"
## [9] "Legal, social and cultural professionals"
## [10] "Information and communications technology professionals"
## [11] "Hospitality, retail and other services managers"
## [12] "Personal service workers"
## [13] "Health professionals"
## [14] "Stationary plant and machine operators"
## [15] "Health associate professionals"
## [16] "Food processing, wood working, garment and other craft and related trades workers"
## [17] "Electrical and electronic trades workers"
## [18] "Protective services workers"
## [19] "Labourers in mining, construction, manufacturing and transport"
## [20] "Drivers and mobile plant operators"
## [21] "Armed forces occupations, other ranks"
## [22] "Metal, machinery and related trades workers"
## [23] "Other clerical support workers"
## [24] "Information and communications technicians"
## [25] "Customer services clerks"
## [26] "Assemblers"
## [27] "Sales workers"
## [28] "Handicraft and printing workers"
## [29] "Personal care workers"
## [30] "Market-oriented skilled agricultural workers"
## [31] "Numerical and material recording clerks"
## [32] "Chief executives, senior officials and legislators"
write_csv(ask_a_manager_2021_cleaned, here::here("data","ask_manager_2021_cleaned.csv"))